Registration number: 19BCE1717
Faculty: Prof. Parvathi R
Slot: L55 + L56
Course code: CSE3020

Time series Lab exercise

Question

Consider the COVID-19 pandemic situation. Analysis and forecast the pandemic trend by using Covid19 dataset

  1. You want to predict the last year monthly, weekly trend based on the RTPCR response, (i.e.) No. of +Ve cases and No. –Ve cases along with admitted and discharged trend.
  2. Identify the death rate due to Covid-19
  3. Compare the last year and this year trends with respect to 1st , 2nd and 3rd wave also forecast the 4th Wave impact in India
  4. Construct the model using the Simple moving average, Exponential Moving Average and ARIMA to forecast the Covid19 Dataset.
  5. Show the all above implementation with the visual impact

Solution

Load necessary libraries:

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ROCR)
## Warning: package 'ROCR' was built under R version 4.0.2
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.0.2
library(devtools)
## Warning: package 'devtools' was built under R version 4.0.2
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.0.2
library(forecast)
## Warning: package 'forecast' was built under R version 4.0.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TTR)
## Warning: package 'TTR' was built under R version 4.0.2


Load the dataset:

data <- read.csv("COVIDdata.csv")
head(data)
##   iso_code continent    location       date total_cases new_cases
## 1      AFG      Asia Afghanistan 2020-02-24           5         5
## 2      AFG      Asia Afghanistan 2020-02-25           5         0
## 3      AFG      Asia Afghanistan 2020-02-26           5         0
## 4      AFG      Asia Afghanistan 2020-02-27           5         0
## 5      AFG      Asia Afghanistan 2020-02-28           5         0
## 6      AFG      Asia Afghanistan 2020-02-29           5         0
##   new_cases_smoothed total_deaths new_deaths new_deaths_smoothed
## 1                 NA           NA         NA                  NA
## 2                 NA           NA         NA                  NA
## 3                 NA           NA         NA                  NA
## 4                 NA           NA         NA                  NA
## 5                 NA           NA         NA                  NA
## 6                 NA           NA         NA                  NA
##   total_cases_per_million new_cases_per_million new_cases_smoothed_per_million
## 1                   0.126                 0.126                             NA
## 2                   0.126                 0.000                             NA
## 3                   0.126                 0.000                             NA
## 4                   0.126                 0.000                             NA
## 5                   0.126                 0.000                             NA
## 6                   0.126                 0.000                             NA
##   total_deaths_per_million new_deaths_per_million
## 1                       NA                     NA
## 2                       NA                     NA
## 3                       NA                     NA
## 4                       NA                     NA
## 5                       NA                     NA
## 6                       NA                     NA
##   new_deaths_smoothed_per_million reproduction_rate icu_patients
## 1                              NA                NA           NA
## 2                              NA                NA           NA
## 3                              NA                NA           NA
## 4                              NA                NA           NA
## 5                              NA                NA           NA
## 6                              NA                NA           NA
##   icu_patients_per_million hosp_patients hosp_patients_per_million
## 1                       NA            NA                        NA
## 2                       NA            NA                        NA
## 3                       NA            NA                        NA
## 4                       NA            NA                        NA
## 5                       NA            NA                        NA
## 6                       NA            NA                        NA
##   weekly_icu_admissions weekly_icu_admissions_per_million
## 1                    NA                                NA
## 2                    NA                                NA
## 3                    NA                                NA
## 4                    NA                                NA
## 5                    NA                                NA
## 6                    NA                                NA
##   weekly_hosp_admissions weekly_hosp_admissions_per_million new_tests
## 1                     NA                                 NA        NA
## 2                     NA                                 NA        NA
## 3                     NA                                 NA        NA
## 4                     NA                                 NA        NA
## 5                     NA                                 NA        NA
## 6                     NA                                 NA        NA
##   total_tests total_tests_per_thousand new_tests_per_thousand
## 1          NA                       NA                     NA
## 2          NA                       NA                     NA
## 3          NA                       NA                     NA
## 4          NA                       NA                     NA
## 5          NA                       NA                     NA
## 6          NA                       NA                     NA
##   new_tests_smoothed new_tests_smoothed_per_thousand positive_rate
## 1                 NA                              NA            NA
## 2                 NA                              NA            NA
## 3                 NA                              NA            NA
## 4                 NA                              NA            NA
## 5                 NA                              NA            NA
## 6                 NA                              NA            NA
##   tests_per_case tests_units total_vaccinations people_vaccinated
## 1             NA                             NA                NA
## 2             NA                             NA                NA
## 3             NA                             NA                NA
## 4             NA                             NA                NA
## 5             NA                             NA                NA
## 6             NA                             NA                NA
##   people_fully_vaccinated total_boosters new_vaccinations
## 1                      NA             NA               NA
## 2                      NA             NA               NA
## 3                      NA             NA               NA
## 4                      NA             NA               NA
## 5                      NA             NA               NA
## 6                      NA             NA               NA
##   new_vaccinations_smoothed total_vaccinations_per_hundred
## 1                        NA                             NA
## 2                        NA                             NA
## 3                        NA                             NA
## 4                        NA                             NA
## 5                        NA                             NA
## 6                        NA                             NA
##   people_vaccinated_per_hundred people_fully_vaccinated_per_hundred
## 1                            NA                                  NA
## 2                            NA                                  NA
## 3                            NA                                  NA
## 4                            NA                                  NA
## 5                            NA                                  NA
## 6                            NA                                  NA
##   total_boosters_per_hundred new_vaccinations_smoothed_per_million
## 1                         NA                                    NA
## 2                         NA                                    NA
## 3                         NA                                    NA
## 4                         NA                                    NA
## 5                         NA                                    NA
## 6                         NA                                    NA
##   new_people_vaccinated_smoothed new_people_vaccinated_smoothed_per_hundred
## 1                             NA                                         NA
## 2                             NA                                         NA
## 3                             NA                                         NA
## 4                             NA                                         NA
## 5                             NA                                         NA
## 6                             NA                                         NA
##   stringency_index population population_density median_age aged_65_older
## 1             8.33   39835428             54.422       18.6         2.581
## 2             8.33   39835428             54.422       18.6         2.581
## 3             8.33   39835428             54.422       18.6         2.581
## 4             8.33   39835428             54.422       18.6         2.581
## 5             8.33   39835428             54.422       18.6         2.581
## 6             8.33   39835428             54.422       18.6         2.581
##   aged_70_older gdp_per_capita extreme_poverty cardiovasc_death_rate
## 1         1.337       1803.987              NA               597.029
## 2         1.337       1803.987              NA               597.029
## 3         1.337       1803.987              NA               597.029
## 4         1.337       1803.987              NA               597.029
## 5         1.337       1803.987              NA               597.029
## 6         1.337       1803.987              NA               597.029
##   diabetes_prevalence female_smokers male_smokers handwashing_facilities
## 1                9.59             NA           NA                 37.746
## 2                9.59             NA           NA                 37.746
## 3                9.59             NA           NA                 37.746
## 4                9.59             NA           NA                 37.746
## 5                9.59             NA           NA                 37.746
## 6                9.59             NA           NA                 37.746
##   hospital_beds_per_thousand life_expectancy human_development_index
## 1                        0.5           64.83                   0.511
## 2                        0.5           64.83                   0.511
## 3                        0.5           64.83                   0.511
## 4                        0.5           64.83                   0.511
## 5                        0.5           64.83                   0.511
## 6                        0.5           64.83                   0.511
##   excess_mortality_cumulative_absolute excess_mortality_cumulative
## 1                                   NA                          NA
## 2                                   NA                          NA
## 3                                   NA                          NA
## 4                                   NA                          NA
## 5                                   NA                          NA
## 6                                   NA                          NA
##   excess_mortality excess_mortality_cumulative_per_million
## 1               NA                                      NA
## 2               NA                                      NA
## 3               NA                                      NA
## 4               NA                                      NA
## 5               NA                                      NA
## 6               NA                                      NA


dim(data)
## [1] 167246     67


There are over 1.6L records in this dataset. Plotting graphs will be extremely computationally intensive, thus, I have chosen to select a subset of the data to work with. For some parts I have used Indian data while for some other cases, I used UK. The reason for using two subset of data is to show trend in India and compare with other countries on the planet, and also for some attributes Indian subset of data does not have any records; however, UK does. TO show variety I have adopted this method.
Subsetting to Indian region:

data <- data[data$location == "India",]
data$date <- as.Date(data$date, format = "%Y-%m-%d")
head(data)
##       iso_code continent location       date total_cases new_cases
## 70066      IND      Asia    India 2020-01-30           1         1
## 70067      IND      Asia    India 2020-01-31           1         0
## 70068      IND      Asia    India 2020-02-01           1         0
## 70069      IND      Asia    India 2020-02-02           2         1
## 70070      IND      Asia    India 2020-02-03           3         1
## 70071      IND      Asia    India 2020-02-04           3         0
##       new_cases_smoothed total_deaths new_deaths new_deaths_smoothed
## 70066                 NA           NA         NA                  NA
## 70067                 NA           NA         NA                  NA
## 70068                 NA           NA         NA                  NA
## 70069                 NA           NA         NA                  NA
## 70070                 NA           NA         NA                  NA
## 70071                 NA           NA         NA                  NA
##       total_cases_per_million new_cases_per_million
## 70066                   0.001                 0.001
## 70067                   0.001                 0.000
## 70068                   0.001                 0.000
## 70069                   0.001                 0.001
## 70070                   0.002                 0.001
## 70071                   0.002                 0.000
##       new_cases_smoothed_per_million total_deaths_per_million
## 70066                             NA                       NA
## 70067                             NA                       NA
## 70068                             NA                       NA
## 70069                             NA                       NA
## 70070                             NA                       NA
## 70071                             NA                       NA
##       new_deaths_per_million new_deaths_smoothed_per_million reproduction_rate
## 70066                     NA                              NA                NA
## 70067                     NA                              NA                NA
## 70068                     NA                              NA                NA
## 70069                     NA                              NA                NA
## 70070                     NA                              NA                NA
## 70071                     NA                              NA                NA
##       icu_patients icu_patients_per_million hosp_patients
## 70066           NA                       NA            NA
## 70067           NA                       NA            NA
## 70068           NA                       NA            NA
## 70069           NA                       NA            NA
## 70070           NA                       NA            NA
## 70071           NA                       NA            NA
##       hosp_patients_per_million weekly_icu_admissions
## 70066                        NA                    NA
## 70067                        NA                    NA
## 70068                        NA                    NA
## 70069                        NA                    NA
## 70070                        NA                    NA
## 70071                        NA                    NA
##       weekly_icu_admissions_per_million weekly_hosp_admissions
## 70066                                NA                     NA
## 70067                                NA                     NA
## 70068                                NA                     NA
## 70069                                NA                     NA
## 70070                                NA                     NA
## 70071                                NA                     NA
##       weekly_hosp_admissions_per_million new_tests total_tests
## 70066                                 NA        NA          NA
## 70067                                 NA        NA          NA
## 70068                                 NA        NA          NA
## 70069                                 NA        NA          NA
## 70070                                 NA        NA          NA
## 70071                                 NA        NA          NA
##       total_tests_per_thousand new_tests_per_thousand new_tests_smoothed
## 70066                       NA                     NA                 NA
## 70067                       NA                     NA                 NA
## 70068                       NA                     NA                 NA
## 70069                       NA                     NA                 NA
## 70070                       NA                     NA                 NA
## 70071                       NA                     NA                 NA
##       new_tests_smoothed_per_thousand positive_rate tests_per_case tests_units
## 70066                              NA            NA             NA            
## 70067                              NA            NA             NA            
## 70068                              NA            NA             NA            
## 70069                              NA            NA             NA            
## 70070                              NA            NA             NA            
## 70071                              NA            NA             NA            
##       total_vaccinations people_vaccinated people_fully_vaccinated
## 70066                 NA                NA                      NA
## 70067                 NA                NA                      NA
## 70068                 NA                NA                      NA
## 70069                 NA                NA                      NA
## 70070                 NA                NA                      NA
## 70071                 NA                NA                      NA
##       total_boosters new_vaccinations new_vaccinations_smoothed
## 70066             NA               NA                        NA
## 70067             NA               NA                        NA
## 70068             NA               NA                        NA
## 70069             NA               NA                        NA
## 70070             NA               NA                        NA
## 70071             NA               NA                        NA
##       total_vaccinations_per_hundred people_vaccinated_per_hundred
## 70066                             NA                            NA
## 70067                             NA                            NA
## 70068                             NA                            NA
## 70069                             NA                            NA
## 70070                             NA                            NA
## 70071                             NA                            NA
##       people_fully_vaccinated_per_hundred total_boosters_per_hundred
## 70066                                  NA                         NA
## 70067                                  NA                         NA
## 70068                                  NA                         NA
## 70069                                  NA                         NA
## 70070                                  NA                         NA
## 70071                                  NA                         NA
##       new_vaccinations_smoothed_per_million new_people_vaccinated_smoothed
## 70066                                    NA                             NA
## 70067                                    NA                             NA
## 70068                                    NA                             NA
## 70069                                    NA                             NA
## 70070                                    NA                             NA
## 70071                                    NA                             NA
##       new_people_vaccinated_smoothed_per_hundred stringency_index population
## 70066                                         NA            10.19 1393409033
## 70067                                         NA            10.19 1393409033
## 70068                                         NA            10.19 1393409033
## 70069                                         NA            10.19 1393409033
## 70070                                         NA            10.19 1393409033
## 70071                                         NA            10.19 1393409033
##       population_density median_age aged_65_older aged_70_older gdp_per_capita
## 70066            450.419       28.2         5.989         3.414       6426.674
## 70067            450.419       28.2         5.989         3.414       6426.674
## 70068            450.419       28.2         5.989         3.414       6426.674
## 70069            450.419       28.2         5.989         3.414       6426.674
## 70070            450.419       28.2         5.989         3.414       6426.674
## 70071            450.419       28.2         5.989         3.414       6426.674
##       extreme_poverty cardiovasc_death_rate diabetes_prevalence female_smokers
## 70066            21.2                282.28               10.39            1.9
## 70067            21.2                282.28               10.39            1.9
## 70068            21.2                282.28               10.39            1.9
## 70069            21.2                282.28               10.39            1.9
## 70070            21.2                282.28               10.39            1.9
## 70071            21.2                282.28               10.39            1.9
##       male_smokers handwashing_facilities hospital_beds_per_thousand
## 70066         20.6                  59.55                       0.53
## 70067         20.6                  59.55                       0.53
## 70068         20.6                  59.55                       0.53
## 70069         20.6                  59.55                       0.53
## 70070         20.6                  59.55                       0.53
## 70071         20.6                  59.55                       0.53
##       life_expectancy human_development_index
## 70066           69.66                   0.645
## 70067           69.66                   0.645
## 70068           69.66                   0.645
## 70069           69.66                   0.645
## 70070           69.66                   0.645
## 70071           69.66                   0.645
##       excess_mortality_cumulative_absolute excess_mortality_cumulative
## 70066                                   NA                          NA
## 70067                                   NA                          NA
## 70068                                   NA                          NA
## 70069                                   NA                          NA
## 70070                                   NA                          NA
## 70071                                   NA                          NA
##       excess_mortality excess_mortality_cumulative_per_million
## 70066               NA                                      NA
## 70067               NA                                      NA
## 70068               NA                                      NA
## 70069               NA                                      NA
## 70070               NA                                      NA
## 70071               NA                                      NA
data$newNegativeCases <- data$new_tests - data$new_cases 
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.0.2


Now we will see plots for question 1 and 2 mentioned above. The following are plots exploring the positive cases, negative cases, hospitalisation, death rates, positive rates, etc with respect to time


The positive cases in India over time:

#data %>% drop_na(new_cases)
data <- data[complete.cases(data$new_cases),]
ggplot(data = data, aes(x = date, y = new_cases)) +geom_point() + labs(x = "Date", y = "trend", title = "New positive Cases",)


New negative cases over time in India

ggplot(data = data, aes(x = date, y = newNegativeCases)) +geom_point() +  labs(x = "Date", y = "new negative cases",title = "New negative Cases",)
## Warning: Removed 76 rows containing missing values (geom_point).


Linear model:

model <- lm(data$new_cases~data$new_deaths+data$new_tests+data$positive_rate)
summary(model)
## 
## Call:
## lm(formula = data$new_cases ~ data$new_deaths + data$new_tests + 
##     data$positive_rate)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -82559 -14127   2503  15743  85887 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -7.730e+04  2.506e+03  -30.85   <2e-16 ***
## data$new_deaths    -6.941e-04  1.826e+00    0.00        1    
## data$new_tests      5.511e-02  2.022e-03   27.25   <2e-16 ***
## data$positive_rate  1.435e+06  2.930e+04   48.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26020 on 686 degrees of freedom
##   (79 observations deleted due to missingness)
## Multiple R-squared:  0.9028, Adjusted R-squared:  0.9024 
## F-statistic:  2124 on 3 and 686 DF,  p-value: < 2.2e-16


Death rate due to COVID over time:

data$deathrate <- data$new_deaths/data$new_cases
library("ggplot2")
ggplot(data = data, aes(x = date, y = deathrate)) + geom_point() + labs(x = "Date",y = "Death Rate", title = "Death Rate due to covid",)
## Warning: Removed 46 rows containing missing values (geom_point).


Total number of cases in India over time:

newcase_ts <- ts(data$new_cases)
ggplot(data = data, aes(x = date, y = total_cases)) + geom_point() + labs(x = "Date", y = "number of cases", title = "Total number of cases wrt time")


Total number of negative cases in India over time:

ggplot(data = data, aes(x = date, y = newNegativeCases )) + geom_point() + labs(x = "Date", y = "Negative cases", title = "Total number of negative cases wrt time")
## Warning: Removed 76 rows containing missing values (geom_point).


new_cases_forecasts <- HoltWinters(newcase_ts, beta = FALSE, gamma = FALSE)
plot(new_cases_forecasts)


Now we shall see the scenario in the UK
Load the UK subset

df <- read.csv("COVIDdata.csv")
data <- df[df$location == "United Kingdom",]
data$date <- as.Date(data$date, format = "%Y-%m-%d")


New cases in UK over time

ggplot(data = data, aes(x = date, y = new_cases)) + geom_point() + labs(x = "Date", y = "new cases", title = "New cases wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).


Number of deaths over time: (cumulative)

ggplot(data = data, aes(x = date, y = total_deaths)) + geom_point() + labs(x = "Date", y = "Total deaths", title = "Total number of deaths wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 35 rows containing non-finite values (stat_smooth).
## Warning: Removed 35 rows containing missing values (geom_point).


Rate of postivity over time in the UK

ggplot(data = data, aes(x = date, y = positive_rate)) + geom_point() + labs(x = "Date", y = "rate of positive", title = "Positive rates wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 91 rows containing non-finite values (stat_smooth).
## Warning: Removed 91 rows containing missing values (geom_point).


Now that the visualisation for the first 2 questions are one, we shall see the next question - comparison between waves.

Here, I have assumed that the first wave is 2020, second is 2021 and third is 2022. This may not be the case in reality as the time period for each wave is much shorter. But I chose this to see the trend in the entire year. Also, it is very difficult to identify the start and end date of a wave. Many economists are not able to agree on common dates, thus, I chose to do based on the year. However, the code is same if we change the time period of the waves too.

Positive rates comparison between the thre waves of COVID-19 in the UK

first <- data
first <- first[first$date >= "2020-01-25" & first$date <= "2020-12-28", ]

second <- data
second <- second[second$date >= "2021-01-01" & second$date <= "2021-12-31", ]

third <- data
third <- third[third$date >= "2022-01-25" & third$date <= "2022-12-28", ]

ggplot(data = first, aes(x = date, y = new_cases)) + geom_point() + labs(x = "Date", y = "new cases", title = "number of new cases wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = second, aes(x = date, y = new_cases)) + geom_point() + labs(x = "Date", y = "new cases", title = "number of new cases wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data = third, aes(x = date, y = new_cases)) + geom_point() + labs(x = "Date", y = "new cases", title = "number of new cases wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).


We see that the number of new cases rose towards the end of 2020. Then in the second wave (2021) it began to decrease, but again rose towards the end of the year. Finally in the year 2022, it seems to be decreasing so far


Positive rates comparison between the thre waves of COVID-19 in the UK

ggplot(data = first, aes(x = date, y = positive_rate)) + geom_point() + labs(x = "Date", y = "positive rates", title = "Positive rates wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## Warning: Removed 67 rows containing missing values (geom_point).

ggplot(data = second, aes(x = date, y = positive_rate)) + geom_point() + labs(x = "Date", y = "Cumulative cases", title = "Positive rates wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).

ggplot(data = third, aes(x = date, y = positive_rate)) + geom_point() + labs(x = "Date", y = "Cumulative cases", title = "Positive rates wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).


We notice that the positive rate was very high in 2020 May. THen it decreased over time and then again it began to rise and in the year 2021, it was very high and ths again went on to reduce. By mid 2021, this began rising once again and in the year 2022, it is decreasing so far. This is because when the number of cases (previous graphs) rose the positivty also rose. It decreased when the other decreased.


New deaths comparison between the thre waves of COVID-19 in the UK

ggplot(data = first, aes(x = date, y = new_deaths)) + geom_point() + labs(x = "Date", y = "deaths", title = "Total number of deaths wrt time") + geom_smooth(se=F) + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 35 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 35 rows containing non-finite values (stat_smooth).
## Warning: Removed 35 rows containing missing values (geom_point).

ggplot(data = second, aes(x = date, y = new_deaths)) + geom_point() + labs(x = "Date", y = "deaths", title = "Total number of deaths wrt time") + geom_smooth(se=F) + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = third, aes(x = date, y = new_deaths)) + geom_point() + labs(x = "Date", y = "deaths", title = "Total number of deaths wrt time") + geom_smooth(se=F) + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).


This also follows the same trend as the number of new cases and positivity.

We can see the same trends being followed in India as well.

Positive rates comparison between the thre waves of COVID-19 in India

data <- read.csv("COVIDdata.csv")
data <- data[data$location == "India",]
data$date <- as.Date(data$date, format = "%Y-%m-%d")

first <- data
first <- first[first$date >= "2020-01-25" & first$date <= "2020-12-28", ]

second <- data
second <- second[second$date >= "2021-01-01" & second$date <= "2021-12-31", ]

third <- data
third <- third[third$date >= "2022-01-25" & third$date <= "2022-12-28", ]

ggplot(data = first, aes(x = date, y = new_cases)) + geom_point() + labs(x = "Date", y = "new cases", title = " number of new cases wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = second, aes(x = date, y = new_cases)) + geom_point() + labs(x = "Date", y = "new cases", title = " number of new cases wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = third, aes(x = date, y = new_cases)) + geom_point() + labs(x = "Date", y = "new cases", title = " number of new cases wrt time") + geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).


The number of cases slowly increased in the second half of 2020. Then it decreased by december and january in 2021. But then we went into 2nd wave of COVID where it peaked once again in March and April. This decreased over time in the rest of the year, and this decrease is being observed in 2022 also.
Positive rates comparison between the three waves of COVID-19 in India

ggplot(data = first, aes(x = date, y = positive_rate)) + geom_point() + labs(x = "Date", y = "positive cases", title = "Positive rates wrt time") + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 50 rows containing non-finite values (stat_smooth).
## Warning: Removed 50 rows containing missing values (geom_point).

ggplot(data = second, aes(x = date, y = positive_rate)) + geom_point() + labs(x = "Date", y = "positive cases", title = "Positive rates wrt time") + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = third, aes(x = date, y = positive_rate)) + geom_point() + labs(x = "Date", y = "positive cases", title = "Positive rates wrt time") + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).


As mentioned above, the similar trend is being followed by positive rates too

New deaths comparison between the thre waves of COVID-19 in India

ggplot(data = first, aes(x = date, y = new_deaths)) + geom_point() + labs(x = "Date", y = "Deaths", title = "Total number of Deaths wrt time") + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 42 rows containing non-finite values (stat_smooth).
## Warning: Removed 42 rows containing missing values (geom_point).

ggplot(data = second, aes(x = date, y = new_deaths)) + geom_point() + labs(x = "Date", y = "Deaths", title = "Total number of Deaths wrt time") + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(data = third, aes(x = date, y = new_deaths)) + geom_point() + labs(x = "Date", y = "Deaths", title = "Total number of Deaths wrt time") + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).


As mentioned above, the similar trend is being followed by deaths too.



Now, we will move to question 4 and 5

Forecasting:

new_cases_forecasts2 <- forecast:::forecast.HoltWinters(new_cases_forecasts, h=30)
plot(forecast(new_cases_forecasts2))


SMA:

data <- read.csv("COVIDdata.csv")
data <- data[data$location == "India",]
data$date <- as.Date(data$date, format = "%Y-%m-%d")

data <- data[complete.cases(data$new_cases),]

new_cases_rep <- ts(data$new_cases)
new_cases_sma <- SMA(new_cases_rep,n=3)
plot(new_cases_sma)


ma2 <- SMA(data$new_cases, level=.95, frequency=2)
ma2
##   [1]       NA       NA       NA       NA       NA       NA       NA       NA
##   [9]       NA      0.3      0.2      0.2      0.2      0.1      0.0      0.0
##  [17]      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0
##  [25]      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0
##  [33]      0.2      0.2      2.5      2.7      2.8      3.1      3.6      4.0
##  [41]      5.3      5.9      6.8      7.7      7.4      8.3      8.8     10.8
##  [49]     11.7     15.1     18.8     26.8     32.3     41.7     43.4     54.4
##  [57]     60.8     74.5     83.1     83.0    100.7    106.7    160.2    204.4
##  [65]    203.1    242.5    286.1    389.1    432.4    489.2    547.4    620.1
##  [73]    644.8    666.2    788.6    840.5    873.4    865.2    904.1    980.6
##  [81]   1089.0   1094.1   1163.4   1216.5   1262.4   1304.3   1396.1   1446.0
##  [89]   1509.9   1560.2   1544.7   1632.4   1717.7   1832.9   1942.8   2190.7
##  [97]   2311.7   2509.7   2690.0   2837.1   2974.6   3229.8   3351.1   3459.3
## [105]   3555.0   3556.0   3638.4   3766.1   3934.7   4063.3   4366.7   4486.7
## [113]   4745.8   5050.2   5336.8   5653.9   5916.6   6014.5   6238.8   6505.8
## [121]   6701.6   6979.9   7238.3   7357.6   7576.8   7828.8   8176.3   8539.1
## [129]   8853.6   9210.0   9243.7   9431.9   9599.6   9916.5  10180.2  10409.8
## [137]  10571.1  10690.7  10744.3  10946.0  11460.4  11890.2  12384.6  12774.7
## [145]  13122.2  13526.1  14068.1  14731.0  15488.8  16191.3  16778.6  17179.2
## [153]  17503.0  17935.9  18532.9  19213.2  20006.0  20701.2  21071.1  21355.8
## [161]  21897.8  22696.2  23543.5  24488.1  25271.0  25843.7  26301.6  27144.4
## [169]  28416.8  29666.7  31048.5  32440.4  33442.2  34355.6  36054.4  38135.6
## [177]  40084.3  41677.8  43178.4  44098.9  45388.8  46375.7  47940.8  50291.0
## [185]  51192.5  51558.7  51872.1  52261.9  52892.0  54700.1  55694.2  57104.7
## [193]  58032.8  57268.7  57891.5  59294.2  60544.5  61766.8  62541.6  62058.9
## [201]  61407.0  61424.3  62185.1  63715.0  64606.3  64830.3  64515.8  64140.1
## [209]  63459.5  66257.1  68481.9  69671.9  70580.8  71542.0  71546.5  72458.3
## [217]  74705.8  76942.4  79863.2  80357.7  81711.3  81645.0  82739.5  84461.8
## [225]  87124.8  89046.1  90095.0  90968.0  90705.7  90654.8  91364.0  93425.5
## [233]  93788.6  93475.6  92516.6  90267.9  89165.4  88609.1  88833.4  88357.3
## [241]  87427.9  86002.5  83727.7  82514.4  82500.4  83140.5  82753.4  81685.5
## [249]  80524.5  78115.0  76459.9  76095.3  76086.0  75366.0  74122.2  72647.0
## [257]  70233.6  69001.6  68328.2  68538.6  67554.9  65889.6  64412.2  61764.0
## [265]  59730.1  58640.8  58543.2  57529.3  55771.4  53949.1  51374.9  49577.1
## [273]  48993.0  49178.8  48601.2  47713.6  46800.1  45294.1  44906.5  45412.7
## [281]  46529.5  47175.8  46755.1  46480.6  45461.1  45192.9  45460.3  46117.2
## [289]  45960.3  45049.3  43340.3  41221.0  40515.3  40482.6  41263.5  41458.6
## [297]  41189.0  41107.0  40436.1  40763.7  42157.8  43549.7  43820.2  43443.6
## [305]  42732.6  41221.2  40360.7  39509.9  39371.9  38599.5  37751.7  36741.6
## [313]  35266.1  34293.1  33568.0  33393.5  32736.2  32206.5  31254.1  29795.4
## [321]  28832.5  27935.4  27567.7  26874.9  26385.2  25881.6  24834.1  24203.7
## [329]  23967.8  24068.0  23657.1  23129.3  22842.4  21970.4  21362.9  21111.4
## [337]  21159.3  20672.2  20018.7  19362.4  18772.6  18708.2  18740.7  18911.4
## [345]  16856.5  18361.0  17988.6  17339.1  17118.2  17162.4  17083.9  16790.9
## [353]  16270.7  15835.6  16840.6  14535.5  14428.8  14624.9  14453.7  14244.0
## [361]  14005.3  13399.7  13154.2  12942.0  13822.5  13749.1  13529.1  13218.2
## [369]  12656.1  12275.1  12244.7  12575.3  12477.7  12517.0  11814.6  11417.4
## [377]  11219.7  11368.4  11435.8  11546.2  11475.7  11399.8  11140.6  11095.7
## [385]  11200.7  11609.0  11901.6  12035.7  12524.7  12368.8  12523.6  13032.5
## [393]  13778.1  14265.9  14653.0  14884.7  14714.0  14786.5  15107.3  15732.7
## [401]  16186.9  16388.5  16590.7  16480.7  16597.6  17332.0  18431.9  19421.2
## [409]  20212.5  21157.8  21778.6  22793.5  24520.7  26954.5  29257.7  31356.9
## [417]  33723.5  35306.8  37501.0  40219.5  43682.1  47017.6  49701.9  52531.3
## [425]  54057.1  55020.5  57558.4  61633.5  65820.2  69797.5  74241.5  77713.9
## [433]  83016.1  88893.0  96468.7 105659.1 113714.0 122458.6 129719.3 138831.6
## [441] 148549.7 160586.8 172482.4 185942.9 200126.3 211504.6 225732.5 240305.7
## [449] 257424.2 273665.6 288560.8 302124.6 310957.7 320911.0 331461.6 344200.4
## [457] 354883.9 362668.3 366182.2 367235.2 370480.7 376424.7 385541.2 389556.3
## [465] 391966.0 389959.9 382754.8 378348.1 377814.8 376397.6 370792.8 360666.7
## [473] 347386.5 333632.0 320024.9 310986.5 303947.4 294835.2 282646.7 270563.8
## [481] 257596.7 247371.8 240363.0 232646.1 223291.7 212236.0 201554.3 188575.4
## [489] 177770.0 168953.9 162547.6 153708.4 144024.6 135451.8 126722.6 119426.9
## [497] 113499.8 109977.9 105132.3  99800.3  93606.0  87600.2  82376.6  79033.8
## [505]  76632.0  73447.7  69924.0  66039.8  61870.6  58872.0  57236.8  56356.4
## [513]  55003.8  53287.0  51653.8  49335.1  48107.6  47641.3  48039.0  47365.3
## [521]  46265.5  45078.4  43678.9  43048.2  43022.6  43605.3  43286.8  42558.8
## [529]  41612.5  40492.0  40071.4  40265.1  40689.7  40124.3  39650.8  39127.9
## [537]  37860.6  37911.5  38334.4  38578.0  38601.2  38402.1  38443.3  37604.3
## [545]  37854.0  38388.5  39802.2  39765.6  39810.4  40289.6  39434.8  39723.1
## [553]  40085.2  41580.6  41078.0  40634.1  39761.0  38416.5  38068.7  38174.8
## [561]  39131.9  38736.1  38046.2  36875.6  35529.4  35140.2  35230.4  36067.1
## [569]  35677.5  34652.8  33148.0  31828.0  31979.0  33301.7  35250.9  36409.0
## [577]  37277.2  37911.0  37559.4  38661.1  40863.1  42851.6  43354.1  43014.3
## [585]  42443.3  40889.6  40168.8  40204.2  40607.4  39748.5  37898.4  36088.6
## [593]  34367.2  32808.2  31970.4  32288.5  32067.2  30818.2  30346.5  29620.4
## [601]  29457.7  29924.6  30522.4  30766.4  30542.0  29705.8  28019.1  26828.8
## [609]  26156.1  26217.3  25956.3  25048.2  23989.9  22862.9  21913.6  21552.6
## [617]  21798.8  21885.8  21349.5  20490.0  19485.9  18784.0  18602.8  18454.4
## [625]  18169.2  17340.7  16574.6  15906.4  15552.1  15584.3  15731.6  15781.9
## [633]  15473.8  15218.2  14862.9  14793.4  15049.4  15178.4  15147.4  14585.0
## [641]  14257.8  13667.5  13267.2  13125.1  13155.2  12903.0  12370.0  12083.0
## [649]  11664.3  11527.9  11585.6  11794.9  11789.6  11628.2  11378.2  11171.8
## [657]  11108.9  11153.0  11251.0  11134.6  10874.3  10471.5  10044.4   9845.6
## [665]   9734.6   9903.0   9715.1   9400.6   9120.9   8789.7   8636.3   8764.0
## [673]   8927.7   8859.7   7947.8   8613.0   8463.4   8429.9   8540.9   8682.1
## [681]   8585.7   8396.9   8210.3   7928.4   8626.8   7704.1   7766.6   6922.7
## [689]   6695.3   7219.5   6953.1   6797.1   6811.6   6898.2   6918.7   6820.0
## [697]   6728.4   7364.2   7569.2   7520.2   8664.0  10309.8  12315.6  15025.6
## [705]  18044.6  23155.6  31595.3  42669.5  55948.6  70596.4  86892.3 101421.1
## [713] 118137.8 139504.5 162186.8 183260.4 201287.8 215386.7 224989.9 237323.7
## [721] 251104.6 269023.7 283322.1 291933.7 296119.9 294824.0 296295.2 299124.7
## [729] 300443.8 295700.0 287374.9 273641.3 256576.8 239362.1 225999.0 215351.0
## [737] 199554.8 181663.8 164930.5 148137.0 131845.4 117562.0 106663.8  95565.9
## [745]  82810.3  71282.2  61227.9  53542.0  48230.1  44062.4  39152.9  34441.3
## [753]  30238.7  26538.5  23561.0  21564.5  20140.2  18228.6  16180.2  14389.5
## [761]  12854.0  11612.6  10663.6   9962.7   9044.6   8177.4   7297.0   6546.4
## [769]   5976.6
fma2 <- forecast(ma2, h=30)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
fma2
##     Point Forecast       Lo 80     Hi 80        Lo 95      Hi 95
## 770      5426.9388    3915.039  6938.838    3114.6880   7739.190
## 771      4896.7398    1564.054  8229.426    -200.1638   9993.643
## 772      4385.2993   -1115.800  9886.399   -4027.9067  12798.505
## 773      3891.9539   -4053.862 11837.770   -8260.1232  16044.031
## 774      3416.0631   -7201.486 14033.612  -12822.0767  19654.203
## 775      2957.0095  -10522.835 16436.854  -17658.6333  23572.652
## 776      2514.1974  -13990.136 19018.530  -22727.0017  27755.397
## 777      2087.0522  -17581.084 21755.188  -27992.7665  32166.871
## 778      1675.0195  -21277.303 24627.342  -33427.5280  36777.567
## 779      1277.5647  -25063.355 27618.485  -39007.3959  41562.525
## 780       894.1719  -28926.085 30714.429  -44711.9752  46500.319
## 781       524.3438  -32854.146 33902.834  -50523.6516  51572.339
## 782       167.6003  -36837.666 37172.867  -56427.0712  56762.272
## 783      -176.5216  -40867.988 40514.945  -62408.7492  62055.706
## 784      -508.4682  -44937.474 43920.538  -68456.7697  67439.833
## 785      -828.6705  -49039.355 47382.014  -74560.5493  72903.208
## 786     -1137.5439  -53167.600 50892.512  -80710.6481  78435.560
## 787     -1435.4892  -57316.825 54445.846  -86898.6166  84027.638
## 788     -1722.8931  -61482.202 58036.416  -93116.8694  89671.083
## 789     -2000.1286  -65659.398 61659.141  -99358.5801  95358.323
## 790     -2267.5554  -69844.513 65309.402 -105617.5929 101082.482
## 791     -2525.5206  -74034.030 68982.989 -111888.3477 106837.307
## 792     -2774.3588  -78224.777 72676.059 -118165.8155 112617.098
## 793     -3014.3931  -82413.889 76385.103 -124445.4430 118416.657
## 794     -3245.9348  -86598.776 80106.906 -130723.1044 124231.235
## 795     -3469.2845  -90777.096 83838.527 -136995.0597 130056.491
## 796     -3684.7321  -94946.732 87577.268 -143257.9172 135888.453
## 797     -3892.5570  -99105.771 91320.657 -149508.6012 141723.487
## 798     -4093.0290 -103252.480 95066.422 -155744.3228 147558.265
## 799     -4286.4083 -107385.299 98812.482 -161962.5547 153389.738
plot(fma2)


ARIMA:

arima_model <- auto.arima(new_cases_rep)
summary(arima_model)
## Series: new_cases_rep 
## ARIMA(3,1,3) 
## 
## Coefficients:
##          ar1      ar2     ar3      ma1     ma2      ma3
##       0.4607  -0.5189  0.9037  -0.2701  0.4596  -0.6868
## s.e.  0.0275   0.0332  0.0227   0.0599  0.0612   0.0310
## 
## sigma^2 estimated as 55449628:  log likelihood=-7934.61
## AIC=15883.23   AICc=15883.37   BIC=15915.73
## 
## Training set error measures:
##                     ME     RMSE     MAE MPE MAPE     MASE       ACF1
## Training set -2.633866 7412.482 4109.46 NaN  Inf 0.882021 0.06448811


SMA_model <- SMA(new_cases_rep,n=3)
summary(SMA_model)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0   10994   29097   56025   59169  409232       2


EMA:

EMA_model <- EMA(new_cases_rep,n=3)
summary(EMA_model)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0   10815   28960   56025   58850  404008       2


Construct the model using the Simple moving average, Exponential Moving Average and ARIMA to forecast the Covid19 Dataset.

autoplot(acf(na.omit(SMA_model),plot=FALSE))+ labs(title="ACF Plot")


autoplot(pacf(na.omit(SMA_model),plot=FALSE))+ labs(title="PACF Plot")


autoplot(acf(na.omit(EMA_model),plot=FALSE))+ labs(title="ACF Plot")


autoplot(pacf(na.omit(EMA_model),plot=FALSE))+ labs(title="PACF Plot")